Antibiotics gene linkage graph

Load generic libraries

source('configuration.r')

Load plot specific libraries

library(foreach)
library(readr)
suppressMessages(library(igraph))
library(ggraph)
library(ggalluvial)

Create graph edge data

dat <- read.table('../tables/antibiotics_gene_linkage.tsv', head=FALSE, stringsAsFactors = FALSE) %>% 
  mutate(id=paste0(V1,':',V2, ':', V3)) %>% 
  mutate(V6=str_replace(V6, 'PheCmlA5', 'Phe')) %>% 
  mutate(V6=str_replace(V6, 'Far1_Fcd', 'Far1_Bla'))

edges.full <- foreach(id=unique(dat$id), .combine=rbind) %do% {
    tmp <- dat[dat$id == id, ]
    if(nrow(tmp) < 2) {
        NULL
    }else{
        edge <- data.frame(t(combn(sort(tmp$V6), 2)), id=id)
        edge$dist <- foreach(r=1:nrow(edge), .combine = c) %do% {
          g1 <- sort(tmp[tmp$V6 == edge[r, ]$X1, 4:5])
          g2 <- sort(tmp[tmp$V6 == edge[r, ]$X2, 4:5])
          min(abs(g1[2]-g2[1]), abs(g1[1]-g2[2]))
        }
        edge
    }
}
## save for ad hoc analysis
write.table(edges.full, '../output_tables/ar_gene_linkage_edge_list_uncollapsed.tsv', row.names = F, quote=F, sep='\t')

Function for ploting

get_graph_data <- function(pattern='', reverse=FALSE, dist.fil=Inf){
  if(reverse){
    edges.collapsed <- filter(edges.full, !str_detect(id, pattern))
  }else{
    edges.collapsed <- filter(edges.full, str_detect(id, pattern)) 
  }
    
  ## filtering and collapse into edge weights
  edges.collapsed <- edges.collapsed %>% 
    group_by(X1, X2) %>% 
    summarise(n=length(dist), weight=sum(dist<dist.fil)) %>% 
    filter(n>1)   ### keep edges with more than 1 support
  
  ## Run MCL graph clustering
  write.table(edges.collapsed %>% filter(weight>0) %>% select(-n), 
              'tmp_edges.tsv', quote=F, sep='\t', col.names = F, row.names = F)
  system('/mnt/software/bin/mcl tmp_edges.tsv  --abc -o tmp_mcl.tsv -overlap keep -I 3 2>/dev/null')
  mcl <- read_tsv('tmp_mcl.tsv', col_names = F)
  grp <- foreach(c=1:nrow(mcl), .combine = 'rbind') %do%{
    tmp <- as.character(na.exclude(as.character(mcl[c,])))
    data.frame(vertex=tmp, grp=c, stringsAsFactors = F)
  } %>% data.frame(row.names = 1)
  grp$grp[grp$grp >= (which(table(grp) < 3)[1])] <- NA ## no annotation for clusters with 2 genes only
  
  ## generate graph data
  g <- graph_from_data_frame(edges.collapsed[,1:3], directed=FALSE)
  ## vertices
  colors <- pal_simpsons('springfield')(16)
  n.colors <- colors[as.numeric(as.factor(str_replace(V(g)$name, '.*_', '')))]
  V(g)$color <- n.colors
  V(g)$community <- grp[V(g)$name,]##cluster_walktrap(g)$membership
  V(g)$Type <- str_replace(V(g)$name, '.*_', '')
  ## Edges
  E(g)$width <- edges.collapsed$n
  E(g)$community <-
    apply(as.data.frame(get.edgelist(g, names=FALSE)), 1,
                      function(x)
                        ifelse(V(g)$community[x[1]] == V(g)$community[x[2]],
                                         LETTERS[V(g)$community[x[1]]], NA))
  ## format vertex names
  aux <- function(x){
      if(length(x) > 2){
          paste0(x[1],'_',x[2])
      }else{
          x[1]
      }
  }
  V(g)$name <- sapply(str_split(V(g)$name, '_'), aux)
  #### carbapenemase
  args <- read.table('../metadata/Bla_Carb.dat', head=F, stringsAsFactors=F)[,1]
  carb <- setNames(rep('No', length(V(g)$name)),V(g)$name)
  carb[names(carb) %in% args] <- 'Yes'
  V(g)$carb <- carb
  
  g
}

Main figure of all genes

g <- get_graph_data(dist.fil = 5e3)

g_cols <- c((pal_lancet("lanonc")(9))[c(2:8,1)], pal_ucscgb("default")(26))[-c(2,7,8,9,10,12,17,18,19,20,21,23,24,26,27)] 
## plot(1:19, 1:19, type="p", pch=65:(65+19-1), cex=2, col=g_cols) ## get legend alphabets
layout = create_layout(g, layout = 'igraph', algorithm = 'kk', kkconst=0.1)
p <- ggraph(layout)+#, circular=TRUE) +
  geom_edge_arc(aes(width=((!is.na(community))+1), alpha=as.factor(is.na(community)), color=community),
                curvature = 0.0,
                #alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  scale_edge_alpha_manual(values=c(0.8, 0.3), guide=FALSE) +
  geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(1,1.5)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void() 
p

ggsave('../plots/fig3a.ar_gene_linkage_graph.pdf', p, width=15, height=17)

Sankey plot

Format data

edge.dat <- data.frame(v=V(g)$name, g=LETTERS[V(g)$community]) %>% filter(!is.na(g))

genome.info <- read.table('../tables/genome_info.dat', sep = '\t') 
merge(dat, genome.info, by=c(1,2), all.x=TRUE) %>% 
  filter(!is.na(V6.y) | V1=='plasmid') %>% ## only take the high/medium quality ones
  select(Species=V1, gene=V6.x) %>% mutate(gene=str_replace(gene, '_.*','')) -> sp.dat

sankey.dat <- 
  merge(sp.dat, edge.dat, by.x=2, by.y=1, all.y=T)  %>% 
  mutate(Species=str_replace(Species,'_',' ')) %>% 
  group_by(Species, g) %>% 
  count %>% 
  mutate(Antibiotics_cluster=as.character(g)) %>% 
  filter(n>3) %>% 
  ungroup() 

sankey.dat$Species <- factor(sankey.dat$Species, ordered = TRUE, levels = c("Acinetobacter baumannii",
"Acinetobacter sp.",
"Burkholderia lata",
"Enterobacter cloacae",
"Klebsiella pneumoniae",
"Pseudomonas aeruginosa",
"Corynebacterium striatum",
"Enterococcus faecalis",
"Enterococcus faecium",
"Enterococcus sp.",
"Staphylococcus aureus",
"Staphylococcus capitis",
"Staphylococcus cohnii",
"Staphylococcus epidermidis",
"Staphylococcus haemolyticus",
"Staphylococcus hominis",
"Staphylococcus lugdunensis",
"Staphylococcus warneri",
"plasmid"))

sankey.dat$Antibiotics_cluster <- factor(sankey.dat$Antibiotics_cluster, ordered=TRUE, levels=c(
  "A", "G",  "J", "K", "L", "M", "N", "B", "C", "D", "E", "F", "H", "I"
))

##  mutate(Species=sapply(Species, function(x) ifelse(x=='plasmid', NA, x)))

Plot

p <- ggplot(subset(sankey.dat, Species!='plasmid'),aes(y=log10(n), axis1=Species, axis2=Antibiotics_cluster)) +
  geom_alluvium(aes(fill=Antibiotics_cluster), width=1/5)  +
  geom_stratum(width=1/5, fill=NA) +
  geom_text(stat = "stratum", label.strata = TRUE, size=6, fontface = "italic", nudge_x=-0.12, hjust=1) +
  scale_fill_manual(values=g_cols[match(levels(sankey.dat$Antibiotics_cluster) , LETTERS[1:14])]) +
  scale_x_discrete(limits = c("Species", "Antibiotics Cluster"), expand = c(.5, .05)) +
  theme_void() +
  theme()
  
p

ggsave('../plots/fig3b.ar_gene_linkage_sankey.pdf', p, width=25, height=25)

Supplementary figures

Staphylococcus aureus network

g <- get_graph_data('Staphylococcus_aureus')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size=5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup11.staph_aureus_ar_gene_linkage_graph.pdf', p, width=5, height=5)

Genome network

g <- get_graph_data('plasmid', reverse = TRUE)
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= 5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.genome_ar_gene_linkage_graph.pdf', p, width=16, height=16)

Plasmid network

g <- get_graph_data('plasmid')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.plasmid_ar_gene_linkage_graph.pdf', p, width=15, height=10)

Session informaton

sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggnewscale_0.1.9000 ggalluvial_0.9.1    ggraph_1.0.2       
##  [4] igraph_1.2.2        readr_1.1.1         foreach_1.4.4      
##  [7] ggsci_2.9           reshape2_1.4.3      stringr_1.3.0      
## [10] tibble_2.0.1        tidyr_0.8.2         dplyr_0.8.0.1      
## [13] gridExtra_2.3       ggplot2_3.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5  purrr_0.3.0       colorspace_1.3-2 
##  [4] htmltools_0.3.6   viridisLite_0.3.0 yaml_2.1.18      
##  [7] utf8_1.1.4        rlang_0.3.1       pillar_1.3.1     
## [10] glue_1.3.0        withr_2.1.2       tweenr_1.0.0     
## [13] plyr_1.8.4        munsell_0.5.0     gtable_0.2.0     
## [16] codetools_0.2-15  evaluate_0.10.1   labeling_0.3     
## [19] knitr_1.20        fansi_0.4.0       Rcpp_1.0.0       
## [22] scales_1.0.0      backports_1.1.2   farver_1.0       
## [25] ggforce_0.1.3     hms_0.4.2         digest_0.6.18    
## [28] stringi_1.3.1     ggrepel_0.8.0     rprojroot_1.3-2  
## [31] cli_1.0.1         tools_3.4.4       magrittr_1.5     
## [34] lazyeval_0.2.1    crayon_1.3.4      pkgconfig_2.0.2  
## [37] MASS_7.3-49       assertthat_0.2.0  rmarkdown_1.9    
## [40] iterators_1.0.9   viridis_0.5.1     R6_2.4.0         
## [43] units_0.6-1       compiler_3.4.4